cap log close
cap program drop _all 
cd "DATA LOCATION"
log using output_file.log, replace


program main

	final_setup
	analysis
	deidentify
	
	
end



program final_setup


use "full_data.dta", clear

	gen Student_Cases_6ft=Student_Cases*distance6ft
	gen Student_Cases_3ft=Student_Cases*distance3ft
	gen Staff_Cases_6ft=Staff_Cases*distance6ft
	gen Staff_Cases_3ft=Staff_Cases*distance3ft
	gen Student_inperson_6ft = inperson*distance6ft
	gen Student_inperson_3ft = inperson*distance3ft
	gen Staff_inperson_6ft = staffinperson*distance6ft
	gen Staff_inperson_3ft = staffinperson*distance3ft

gen count=1

	collapse (sum) comm_rate_weight inperson staffinperson Student_Cases Student_Cases_6ft Student_Cases_3ft Staff_Cases Staff_Cases_6ft Staff_Cases_3ft Student_inperson_6ft  Student_inperson_3ft Staff_inperson_6ft Staff_inperson_3ft count, by(end_date)

	foreach var in 6ft 3ft {
		gen Student_Rate_`var' = (Student_Cases_`var'/Student_inperson_`var')*100000/7
		gen Staff_Rate_`var' = (Staff_Cases_`var'/Staff_inperson_`var')*100000/7

	}

	gen student_all = (Student_Cases/inperson)*100000/7
	gen staff_all = (Staff_Cases/staffinperson)*100000/7
	gen community_rate= comm_rate_weight/ inperson

	keep Student_Rate_6ft Staff_Rate_6ft Student_Rate_3ft Staff_Rate_3ft student_all staff_all community_rate end_date

	save "figures_table1_data.dta", replace

	use "full_data.dta", clear

	gen mean_inperson=inperson
	 

	collapse (mean) DISTRICT_TOT mean_inperson (max) inperson staffinperson distance6ft distance3ft, by(District_Name)
	gen hybridpercent=mean_inperson/DISTRICT_TOT
	gen full=hybridpercent>=.8
	gen all=1
	
	save "table2_model.dta", replace
	
	use "full_data.dta", clear

	gen cohort2=1 if cohort==3 | cohort==4
	replace cohort2=0 if cohort==5
	gen distance_ov3_un6=distance_under6-distance3ft
	gen samemode=1 if Elementarymiddlehighsamemode=="Yes"
	
	collapse (first) SchoolDistrictModel samemode studentmasks staffmasks distance6ft distance3ft distance_under6 distance_ov3_un6 deepcleaning cohort2 symptomscreen ventilation testing vaccination  (max) inperson staffinperson, by(District_Name)
	replace vaccination=1 if vaccination !=1
	replace staffmasks=1 if staffmasks!=1
	gen all =1
	
	save "table2_mitigation.dta", replace
	
	use "full_data.dta", clear
 

	collapse (max) White Black Hispanic Asian Other Total est_population_5_17 est_population_5_17_poverty inperson staffinperson distance6ft distance3ft, by(District_Name)
	gen all=1
	save "table2_demographics.dta", replace
	
	use "full_data.dta", clear

	gen inperson_percent=(IN_PERSON_TOT+HYBRID_TOT)/DISTRICT_TOT
	gen student_case_rate=(Student_Cases/inperson)*100000/7
	gen staff_case_rate=(Staff_Cases/staffinperson)*100000/7
	save "table3_data_all.dta", replace	
	keep if distance6ft==1 | distance3ft==1
	save "table3_data_3and6.dta", replace
	
	
	use "full_data.dta", clear
	
	gen Student_Cases_6ft=Student_Cases*distance6ft
	gen Student_Cases_3ft=Student_Cases*distance3ft
	gen Staff_Cases_6ft=Staff_Cases*distance6ft
	gen Staff_Cases_3ft=Staff_Cases*distance3ft

	collapse (sum) inperson staffinperson Student_Cases Staff_Cases Student_Cases_6ft Student_Cases_3ft Staff_Cases_6ft Staff_Cases_3ft
	
	save "counts_for_paper.dta", replace
	

	end


program analysis

	use "figures_table1_data.dta", clear
	
	************************************
	* Make Versions of Figures 1 and 2 *
	************************************
	
	graph twoway scatter Student_Rate_6ft Staff_Rate_6ft Student_Rate_3ft Staff_Rate_3ft end_date, c(l l l l) title("Figure 1")
	graph twoway scatter student_all staff_all community_rate end_date, c(l l l) title("Figure 2")
	
	*************
	*  Table 1 * 
	************
	
	keep Student_Rate_6ft Staff_Rate_6ft Student_Rate_3ft Staff_Rate_3ft end_date 
	
	table end_date, contents(mean Student_Rate_6ft mean Student_Rate_3ft  mean Staff_Rate_6ft mean Staff_Rate_3ft) format(%9.3g)
	
	*******************************
	* Table 2: First Rows (Model) *
	*******************************
	use  "table2_model.dta", clear
	foreach var in all distance6ft distance3ft {
	preserve
	di "`var'"
	keep if `var'==1	
	qui sum inperson if full==1
		di r(sum)
		qui sum staffinperson if full==1
		di r(sum)

	qui sum inperson if full==0
		di r(sum)
		qui sum staffinperson if full==0
		di r(sum)	
	restore
	}
	
	************************************
	* Table 2: Second Rows (Mitigation)*
	************************************
	
	use "table2_mitigation.dta", clear
	
	foreach var in all distance6ft distance3ft {
	preserve
	di "`var'"
	keep if `var'==1	

	foreach var in samemode studentmasks staffmasks distance6ft distance3ft distance_under6 distance_ov3_un6 deepcleaning cohort2 symptomscreen ventilation testing vaccination {
		di "`var'"
		count if `var'==1
		qui sum inperson if `var'==1
		di r(sum)
		qui sum staffinperson if `var'==1
		di r(sum)
	}
	restore
	}
	
	************************************
	* Table 2: Third Rows (Demographics)*
	************************************	
	
	use "table2_demographics.dta", clear
	
	foreach var in all distance6ft distance3ft {
	preserve
	di "`var'"
	keep if `var'==1	
	
	collapse (sum) White Black Hispanic Asian Other Total est_population_5_17 est_population_5_17_poverty
	gen white_pct=White/Total
	sum white_pct
	gen black_pct=Black/Total
	sum black_pct
	gen hispanic_pct=Hispanic/Total
	sum hispanic_pct
	gen asian_pct=Asian/Total
	sum asian_pct
	gen other_pct=Other/Total
	sum other_pct
	gen pov_pct=est_population_5_17_poverty/est_population_5_17
	sum pov_pct
	restore 
	}
	
	************
	* Table 3 *
	************	
	
	use "table3_data_3and6.dta", clear
	
	di "Row 1"
	
	nbreg student_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg student_case_rate distance6ft i.end_date, vce(cluster District_Code) irr
	
	nbreg staff_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg staff_case_rate distance6ft i.end_date, vce(cluster District_Code) irr
	
	
	di "Row 2"
	
	nbreg student_case_rate distance6ft case_rate_per100k_zip pct_Black pct_Hispanic pct_Asian pct_Other est_population_5_17_poverty_pct i.end_date, vce(cluster District_Code) irr
	nbreg student_case_rate distance6ft pct_Black pct_Hispanic pct_Asian pct_Other est_population_5_17_poverty_pct i.end_date, vce(cluster District_Code) irr

	nbreg staff_case_rate distance6ft case_rate_per100k_zip pct_Black pct_Hispanic pct_Asian pct_Other est_population_5_17_poverty_pct i.end_date, vce(cluster District_Code) irr
	nbreg staff_case_rate distance6ft pct_Black pct_Hispanic pct_Asian pct_Other est_population_5_17_poverty_pct i.end_date, vce(cluster District_Code) irr
	
	di "Row 3"
	
	drop if testing==1 
	nbreg student_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg student_case_rate distance6ft i.end_date, vce(cluster District_Code) irr


	nbreg staff_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg staff_case_rate distance6ft i.end_date, vce(cluster District_Code) irr
	
	use  "table3_data_all.dta", clear
	
	di "Row 4" 
	
	nbreg student_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg student_case_rate distance6ft i.end_date, vce(cluster District_Code) irr

	nbreg staff_case_rate distance6ft case_rate_per100k_zip i.end_date, vce(cluster District_Code) irr
	nbreg staff_case_rate distance6ft i.end_date, vce(cluster District_Code) irr


	*******************
	*  Other Analyses *
	*******************

	use "counts_for_paper.dta", clear
	di "Counts for Paper"
	
	sum
	

	

end

program deidentify

	use "full_data.dta", clear
	replace testing = 0 if testing==.
	
	drop district_id
	gen random = runiform()
	egen mean_random = mean(random), by(District_Code)
	egen district_id = group(mean_random)

	
	
	keep district_id distance6ft distance3ft Student_Cases Staff_Cases inperson staffinperson case_rate_per100k_zip DISTRICT_TOT pct_Black pct_Hispanic pct_Asian pct_Other est_population_5_17_poverty_pct end_date testing
	
	sort district_id end_date
	save "deidentified_data", replace
 
end


main

